"""
    使用urllib对  燃文小说网站 的小说内容的爬取
    网站小说大全的网址 http://www.ranwenw.com/xiaoshuodaquan/
"""
import gzip
import urllib.request
import urllib.error
import io

# 程序入口
# 爬取网站的url
url = "http://www.ranwenw.com/xiaoshuodaquan/"
# 定义UserAgent 用户代理，模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36 Edg/93.0.961.52',
    'Cookie': 'UM_distinctid=17b686300aa1d8-0f6ca80b47cb95-7868786b-144000-17b686300ab898; CNZZDATA1259263205=1590965726-1629539521-null%7C1629544942',
    'Referer': 'http://www.ranwenw.com/',
    'Cache-Control': 'max-age=0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Connection': 'keep-alive'
}

# 将请求数据封装为request对象
request = urllib.request.Request(url=url,headers=headers)

response = urllib.request.urlopen(request)

content = response.read()

# 将压缩的文件暂存在文件中
file = open('index.html','wb')
file.write(content)
file.close()

content = gzip.open('index.html')

f = io.IOBase(content)
print(f.read())








